library(MAST)
Loading required package: SummarizedExperiment
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: DelayedArray
Loading required package: matrixStats
Attaching package: ‘matrixStats’
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
Attaching package: ‘DelayedArray’
The following objects are masked from ‘package:matrixStats’:
colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
The following object is masked from ‘package:base’:
apply
Attaching package: ‘MAST’
The following object is masked from ‘package:stats’:
filter
for(i in 1:length(data)){
y <- data[[i]]$human
pca <- prcomp(t(y[order(-1*apply(y,1,var)),][1:1000,]),scale=T)
pca <- cbind(data[[i]]$meta,CDR=colSums(y>0),pca$x)
print(ggpairs(pca, columns=c('PC1', 'PC2', 'PC3', 'PC4', 'CDR'),
mapping=aes(color=Treatment), upper=list(continuous='cor')))
}
plot: [1,1] [====----------------------------------------------------------------------------------------------] 4% est: 0s
plot: [1,2] [========------------------------------------------------------------------------------------------] 8% est: 5s
plot: [1,3] [============--------------------------------------------------------------------------------------] 12% est: 6s
plot: [1,4] [================----------------------------------------------------------------------------------] 16% est: 6s
plot: [1,5] [====================------------------------------------------------------------------------------] 20% est: 7s
plot: [2,1] [========================--------------------------------------------------------------------------] 24% est: 7s
plot: [2,2] [===========================-----------------------------------------------------------------------] 28% est: 6s
plot: [2,3] [===============================-------------------------------------------------------------------] 32% est: 6s
plot: [2,4] [===================================---------------------------------------------------------------] 36% est: 5s
plot: [2,5] [=======================================-----------------------------------------------------------] 40% est: 6s
plot: [3,1] [===========================================-------------------------------------------------------] 44% est: 6s
plot: [3,2] [===============================================---------------------------------------------------] 48% est: 5s
plot: [3,3] [===================================================-----------------------------------------------] 52% est: 4s
plot: [3,4] [=======================================================-------------------------------------------] 56% est: 4s
plot: [3,5] [===========================================================---------------------------------------] 60% est: 4s
plot: [4,1] [===============================================================-----------------------------------] 64% est: 3s
plot: [4,2] [===================================================================-------------------------------] 68% est: 3s
plot: [4,3] [=======================================================================---------------------------] 72% est: 2s
plot: [4,4] [==========================================================================------------------------] 76% est: 2s
plot: [4,5] [==============================================================================--------------------] 80% est: 2s
plot: [5,1] [==================================================================================----------------] 84% est: 1s
plot: [5,2] [======================================================================================------------] 88% est: 1s
plot: [5,3] [==========================================================================================--------] 92% est: 1s
plot: [5,4] [==============================================================================================----] 96% est: 0s
plot: [5,5] [==================================================================================================]100% est: 0s
plot: [1,1] [====----------------------------------------------------------------------------------------------] 4% est: 0s
plot: [1,2] [========------------------------------------------------------------------------------------------] 8% est: 5s
plot: [1,3] [============--------------------------------------------------------------------------------------] 12% est: 5s
plot: [1,4] [================----------------------------------------------------------------------------------] 16% est: 5s
plot: [1,5] [====================------------------------------------------------------------------------------] 20% est: 6s
plot: [2,1] [========================--------------------------------------------------------------------------] 24% est: 6s
plot: [2,2] [===========================-----------------------------------------------------------------------] 28% est: 5s
plot: [2,3] [===============================-------------------------------------------------------------------] 32% est: 5s
plot: [2,4] [===================================---------------------------------------------------------------] 36% est: 4s
plot: [2,5] [=======================================-----------------------------------------------------------] 40% est: 4s
plot: [3,1] [===========================================-------------------------------------------------------] 44% est: 4s
plot: [3,2] [===============================================---------------------------------------------------] 48% est: 4s
plot: [3,3] [===================================================-----------------------------------------------] 52% est: 4s
plot: [3,4] [=======================================================-------------------------------------------] 56% est: 3s
plot: [3,5] [===========================================================---------------------------------------] 60% est: 3s
plot: [4,1] [===============================================================-----------------------------------] 64% est: 3s
plot: [4,2] [===================================================================-------------------------------] 68% est: 2s
plot: [4,3] [=======================================================================---------------------------] 72% est: 2s
plot: [4,4] [==========================================================================------------------------] 76% est: 2s
plot: [4,5] [==============================================================================--------------------] 80% est: 1s
plot: [5,1] [==================================================================================----------------] 84% est: 1s
plot: [5,2] [======================================================================================------------] 88% est: 1s
plot: [5,3] [==========================================================================================--------] 92% est: 1s
plot: [5,4] [==============================================================================================----] 96% est: 0s
plot: [5,5] [==================================================================================================]100% est: 0s
plot: [1,1] [====----------------------------------------------------------------------------------------------] 4% est: 0s
plot: [1,2] [========------------------------------------------------------------------------------------------] 8% est: 3s
plot: [1,3] [============--------------------------------------------------------------------------------------] 12% est: 3s
plot: [1,4] [================----------------------------------------------------------------------------------] 16% est: 3s
plot: [1,5] [====================------------------------------------------------------------------------------] 20% est: 3s
plot: [2,1] [========================--------------------------------------------------------------------------] 24% est: 3s
plot: [2,2] [===========================-----------------------------------------------------------------------] 28% est: 3s
plot: [2,3] [===============================-------------------------------------------------------------------] 32% est: 3s
plot: [2,4] [===================================---------------------------------------------------------------] 36% est: 3s
plot: [2,5] [=======================================-----------------------------------------------------------] 40% est: 3s
plot: [3,1] [===========================================-------------------------------------------------------] 44% est: 2s
plot: [3,2] [===============================================---------------------------------------------------] 48% est: 2s
plot: [3,3] [===================================================-----------------------------------------------] 52% est: 2s
plot: [3,4] [=======================================================-------------------------------------------] 56% est: 2s
plot: [3,5] [===========================================================---------------------------------------] 60% est: 2s
plot: [4,1] [===============================================================-----------------------------------] 64% est: 2s
plot: [4,2] [===================================================================-------------------------------] 68% est: 2s
plot: [4,3] [=======================================================================---------------------------] 72% est: 1s
plot: [4,4] [==========================================================================------------------------] 76% est: 1s
plot: [4,5] [==============================================================================--------------------] 80% est: 1s
plot: [5,1] [==================================================================================----------------] 84% est: 1s
plot: [5,2] [======================================================================================------------] 88% est: 1s
plot: [5,3] [==========================================================================================--------] 92% est: 0s
plot: [5,4] [==============================================================================================----] 96% est: 0s
plot: [5,5] [==================================================================================================]100% est: 0s
The CDR clearly correlates strongle to the either or both the 1st and/or 2nd principal components (PCs). This makes it clear that the CDR should always be considered when performing any modelling. Let’s us see the propotion of variance accounted for by these first PCs.
lapply(data,function(x){
y <- x$human
pca <- prcomp(t(y[order(-1*apply(y,1,var)),][1:1000,]),scale=T)
eigs <- pca$sdev^2
rbind("Prop. of Var."=eigs[1:5] / sum(eigs),"Cum. Sum. Prop. of Var."=cumsum(eigs[1:5])/sum(eigs))
})
$VHI098
[,1] [,2] [,3] [,4] [,5]
Prop. of Var. 0.08551196 0.03397286 0.02837749 0.01915273 0.01623797
Cum. Sum. Prop. of Var. 0.08551196 0.11948482 0.14786231 0.16701503 0.18325300
$VHI098.resist
[,1] [,2] [,3] [,4] [,5]
Prop. of Var. 0.05854842 0.04428817 0.0273953 0.02453004 0.0207859
Cum. Sum. Prop. of Var. 0.05854842 0.10283658 0.1302319 0.15476192 0.1755478
$HCI009
[,1] [,2] [,3] [,4] [,5]
Prop. of Var. 0.1145667 0.04506263 0.02884657 0.01993381 0.01282113
Cum. Sum. Prop. of Var. 0.1145667 0.15962932 0.18847589 0.20840970 0.22123083
for(i in 1:length(data)){
y <- cbind(data[[i]]$meta,CDR=colSums(data[[i]]$human > 0))
if("Response" %in% names(y)){
print(ggplot(y) + aes(x=Response,y=CDR,fill=Response,group=Sample) + geom_violin(draw_quantiles = c(.25,.5,.75)))
} else {
print(ggplot(y) + aes(x=Treatment,y=CDR,fill=Treatment,group=Sample) + geom_violin(draw_quantiles = c(.25,.5,.75)))
}
}
It is clear that the CDR has been driving a lot of our observations. Whether this is biological is seperate question, but regardless we should include it in our modelling attempts. Let us perform the DE analysis using edgeR, as done by Mike, but this time including the CDR.
This is the orginal model used by Mike. For this initial test, we will only use the VHIO098 dataset. For reference, Mike’s analysis returned:
TreatmentTreated vs. TreatmentUntreated -1 1398 0 9854 1 2210
design.mat <- model.matrix(~ 0 + Treatment + Sample, data=data$VHI098$meta)
contrast.mat <- makeContrasts(TreatmentTreated-TreatmentUntreated, levels=design.mat)
vfit <- lmFit(data$VHI098$human, design.mat)
Coefficients not estimable: SampleSIGAH11
Partial NA coefficients for 13462 probe(s)
vfit <- contrasts.fit(vfit, contrasts=contrast.mat)
efit <- eBayes(vfit)
DE <- as.data.table(topTable(efit,p.value = 0.05, n=Inf),keep.rownames=TRUE)
DE[,direction:=factor(logFC>0)]
levels(DE$direction) <- c("down.reg","up.reg")
models <- list()
models$niave <- DE
DE[,.N,direction]
As expected, this is the same as what Mike obtained.
Let’s first look at the relative AICs when adding this factor.
aic <- with(cbind(data$VHI098$meta,CDR=colSums(data$VHI098$human>0)),
selectModel(data$VHI098$human, list(
model.matrix(~ 0 + Treatment + Sample),
model.matrix(~ 0 + Treatment + Sample + CDR)
), criterion="aic"))
Coefficients not estimable: SampleSIGAH11
Partial NA coefficients for 13462 probe(s)
Coefficients not estimable: SampleSIGAH11
Partial NA coefficients for 13462 probe(s)
table(aic$pref)
1 2
2853 10609
table(exp(as.numeric(diff(t(aic$IC[aic$pref=="2",])))/2) < 0.05) #FDR correction needed?
FALSE TRUE
2281 8328
The AIC shows that the majority of genes are fitted better by the model that incorporates the CDR. Furthermore, the propabilities that this model is an improvement for this “subset” leaves no doubts as to using it.
design.mat <- model.matrix(~ 0 + Treatment + Sample + CDR,
data=cbind(data$VHI098$meta,CDR=colSums(data$VHI098$human>0)))
contrast.mat <- makeContrasts(TreatmentTreated-TreatmentUntreated, levels=design.mat)
vfit <- lmFit(data$VHI098$human, design.mat)
Coefficients not estimable: SampleSIGAH11
Partial NA coefficients for 13462 probe(s)
vfit <- contrasts.fit(vfit, contrasts=contrast.mat)
efit <- eBayes(vfit)
DE <- as.data.table(topTable(efit,p.value = 0.05, n=Inf),keep.rownames=TRUE)
DE[,direction:=factor(logFC>0)]
levels(DE$direction) <- c("down.reg","up.reg")
models$niave.CDR <- DE
DE[,.N,direction]
We see a large reduction in the number of up.reg genes which is not quite matched matched by an increase in the number of down.reg genes. In total, the extra coeffienct reduces the total number of DE genes by roughly 600. This is a good indication that it should be included in the model, as the number of samples is such that are we are fairly safe from overfitting. What is the overlap between these models?
x <- merge(models$niave[,.(rn,ID,niave=direction)],
models$niave.CDR[,.(rn,ID,niave.CDR=direction)],
by=c("rn","ID"),all=TRUE,)
levels(x$niave) <- c(-1,1,0)
levels(x$niave.CDR) <- c(-1,1,0)
x[is.na(x)] <- 0
vennDiagram(x[,.(niave,niave.CDR)],cex=0.8,include="up",main="Up.reg.DE")
vennDiagram(x[,.(niave,niave.CDR)],cex=0.8,include="down",main="Down.reg.DE")
For this model, let us model the expression as either on (!=0) or off (==0). Then let’s use logistic regression to model the treatment effect.
data.binary <- as.data.table(data$VHI098$human != 0)
data.binary[,row.id:=.I]
DE <- with(data$VHI098$meta,
data.binary[, as.list(summary(glm(unlist(.SD)~Treatment+Sample+cdr))$coefficients[2,]), row.id]
)
DE$row.id <- rownames(data$VHI098$human)
DE[,fdr:=p.adjust(`Pr(>|t|)`, 'fdr')]
DE <- DE[fdr<0.05]
DE[,direction:=factor(Estimate>0)]
levels(DE$direction) <- c("down.reg","up.reg")
models$logistic <- DE
DE[,.N,direction]
We need to first determine an appropiate threshold to below which we set the value to zero.
thres <- thresholdSCRNACountMatrix(data$VHI098$human, nbins = 20, min_per_bin = 30)
(0.427,0.729] (0.729,0.903] (0.903,1.09] (1.09,1.31] (1.31,1.54] (1.54,1.79] (1.79,2.08] (2.08,2.39] (2.39,2.73] (2.73,3.1] (3.1,3.52] (3.52,3.97] (3.97,4.47] (4.47,5.02]
0 0 0 0 0 0 0 0 0 0 0 0 0 0
(5.02,6.3] (6.3,8.73]
0 0
par(mfrow=c(5,4))
print(plot(thres))
$ask
[1] FALSE
Mike’s normalisation has apparently already done this and at all median expression binnings, the threshold is reported as zero.
expMat <- as.matrix(data$VHI098$human)
rownames(expMat) <- make.names(rownames(expMat),unique = T)
cData <- as.data.frame(cbind(data$VHI098$meta,CDR=colSums(data$VHI098$human>0)))
rownames(cData) <- colnames(expMat)
cData$Treatment <- factor(cData$Treatment,levels = c("Untreated","Treated"))
fData <- data.frame(gene=rownames(expMat))
rownames(fData) <- rownames(expMat)
x <- FromMatrix(expMat,cData,fData)
`fData` has no primerid. I'll make something up.
`cData` has no wellKey. I'll make something up.
y <- zlm(~ Treatment + Sample + CDR, x)
Coefficients SampleSIGAH11 are never estimible and will be dropped..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
Done!
z <- summary(y,doLRT="TreatmentTreated")
Combining coefficients and standard errors
Calculating log-fold changes
Calculating likelihood ratio tests
Refitting on reduced model...
.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
Done!
z <- z$datatable
DE <- merge(
z[contrast=='TreatmentTreated' & component=='H',.(primerid, `Pr(>Chisq)`)],
z[contrast=='TreatmentTreated' & component=='logFC', .(primerid, coef, ci.hi, ci.lo)],
by='primerid')
DE[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')]
DE <- DE[fdr<0.05]
DE[,direction:=factor(coef>0)]
levels(DE$direction) <- c("down.reg","up.reg")
models$zero.inf <- DE
DE[,.N,direction]
Note that the above model calls something as DE if the hurdle (gene being on or off) is signifgantly altered w.r.t. to the contrast tested. There does not have to be a change in the expression of genes that are “on”.
What is the overlap w.r.t. the other models?
x <- merge(models$niave[,.(rn,ID,niave=direction)],
models$niave.CDR[,.(rn,ID,niave.CDR=direction)],
by=c("rn","ID"),all=TRUE,)
x <- merge(x,
models$zero.inf[,.(primerid,zero.inf=direction)],
by.x=c("ID"),by.y=c("primerid"),all=TRUE)
x <- merge(x,
models$logistic[,.(row.id,logistic=direction)],
by.x=c("ID"),by.y=c("row.id"),all=TRUE)
levels(x$niave) <- c(-1,1,0)
levels(x$niave.CDR) <- c(-1,1,0)
levels(x$zero.inf) <- c(-1,1,0)
levels(x$logistic) <- c(-1,1,0)
x[is.na(x)] <- 0
vennDiagram(x[,.(niave,niave.CDR,zero.inf,logistic)],cex=0.8,include="up",main="Up.reg.DE")
vennDiagram(x[,.(niave,niave.CDR,zero.inf,logistic)],cex=0.8,include="down",main="Down.reg.DE")
And let’s see what happends when we enforce a LFC threshold.
x <- merge(models$niave[abs(logFC)>.5,.(rn,ID,niave=direction)],
models$niave.CDR[abs(logFC)>.5,.(rn,ID,niave.CDR=direction)],
by=c("rn","ID"),all=TRUE,)
x <- merge(x,
models$zero.inf[abs(coef)>.5,.(primerid,zero.inf=direction)],
by.x=c("ID"),by.y=c("primerid"),all=TRUE)
levels(x$niave) <- c(-1,1,0)
levels(x$niave.CDR) <- c(-1,1,0)
levels(x$zero.inf) <- c(-1,1,0)
x[is.na(x)] <- 0
vennDiagram(x[,.(niave,niave.CDR,zero.inf)],cex=0.8,include="up",main="Up.reg.DE")
vennDiagram(x[,.(niave,niave.CDR,zero.inf)],cex=0.8,include="down",main="Down.reg.DE")
Finally, let’s make a few pretty plots of the most DE genes for each model.
DE <- models$niave
i <- which(rownames(data$VHI098$human) %in% DE[order(adj.P.Val)][abs(logFC)>.5][1:16,ID])
x <- merge(melt(data$VHI098$human[i,]),data$VHI098$meta,by.x=c("Var2"),by.y=c("sub.sample"))
ggplot(x) + aes(x=Treatment,y=value,group=Sample,fill=Treatment) + geom_violin(draw_quantiles = c(.5)) + facet_wrap(~Var1,scale='free_y')
DE <- models$niave
i <- which(rownames(data$VHI098$human) %in% DE[order(adj.P.Val)][abs(logFC)>.5][1:50,ID])
j <- data$VHI098$meta[,order(Treatment)]
labels.col <- rep("",length(i))
ann.col <- as.data.frame(data$VHI098$meta[j,.(Sample,Treatment)])
rownames(ann.col) <- data$VHI098$meta[j,sub.sample]
pheatmap(data$VHI098$human[i,j],labels_col=labels.col,annotation_col = ann.col,cluster_cols = F)
DE <- models$niave.CDR
i <- which(rownames(data$VHI098$human) %in% DE[order(adj.P.Val)][abs(logFC)>.5][1:16,ID])
x <- merge(melt(data$VHI098$human[i,]),data$VHI098$meta,by.x=c("Var2"),by.y=c("sub.sample"))
ggplot(x) + aes(x=Treatment,y=value,group=Sample,fill=Treatment) + geom_violin(draw_quantiles = c(.5)) + facet_wrap(~Var1,scale='free_y')
DE <- models$niave.CDR
i <- which(rownames(data$VHI098$human) %in% DE[order(adj.P.Val)][abs(logFC)>.5][1:50,ID])
j <- data$VHI098$meta[,order(Treatment)]
labels.col <- rep("",length(i))
ann.col <- as.data.frame(data$VHI098$meta[j,.(Sample,Treatment)])
rownames(ann.col) <- data$VHI098$meta[j,sub.sample]
pheatmap(data$VHI098$human[i,j],labels_col=labels.col,annotation_col = ann.col,cluster_cols = F)
DE <- models$zero.inf
i <- which(rownames(data$VHI098$human) %in% DE[order(fdr)][abs(coef)>.5][1:16,primerid])
x <- merge(melt(data$VHI098$human[i,]),data$VHI098$meta,by.x=c("Var2"),by.y=c("sub.sample"))
ggplot(x) + aes(x=Treatment,y=value,group=Sample,fill=Treatment) + geom_violin(draw_quantiles = c(.5)) + facet_wrap(~Var1,scale='free_y')
DE <- models$zero.inf
i <- which(rownames(data$VHI098$human) %in% DE[order(fdr)][abs(coef)>.5][1:50,primerid])
j <- data$VHI098$meta[,order(Treatment)]
labels.col <- rep("",length(i))
ann.col <- as.data.frame(data$VHI098$meta[j,.(Sample,Treatment)])
rownames(ann.col) <- data$VHI098$meta[j,sub.sample]
pheatmap(data$VHI098$human[i,j],labels_col=labels.col,annotation_col = ann.col,cluster_cols = F)